
/* NOTE: This file must be run on the NBER server */


clear

global fullcount_files /homes/data/cens1940.work/olivetti/msjohn/dtafiles/output
global myfiles /homes/data/cens1940.work/olivetti/lsalisbu

global logfiles_dir $myfiles

set logtype text
set matsize 800
set more off
set seed 1234567

cap log close
log using "$logfiles_dir/004_create_1pct_extract.log", replace

* Read all regions, extract 1% of households, then save
local regions west midwest south northeast

foreach region in `regions' {
   use "$fullcount_files/`region'.dta", clear
   cap rename hhid hhid_numeric
   egen tag_hhid = tag(hhid_numeric)
   tostring hhid_numeric, replace
   gen u = uniform() if tag_hhid==1
   egen uu = mean(u), by(hhid_numeric)
   gen samp1 = uu<=0.01
   drop u uu
   gen u = uniform() if tag_hhid==1
   egen uu = mean(u), by(hhid_numeric) 
   gen samp2 = uu<=0.01
   keep if samp1==1 | samp2==1
   cap drop countyicp
   save "$myfiles/`region'_1pct.dta", replace
}


use "$myfiles/northeast_1pct.dta", clear
append using  "$myfiles/midwest_1pct.dta"
append using  "$myfiles/south_1pct.dta"
append using  "$myfiles/west_1pct.dta"

save "$myfiles/ALL_1pct.dta", replace

log close


